# -----
# supported by liukun
# -----

from sklearn import preprocessing
import pandas as pd
import numpy as np

INPUT_FEATURE_PATH1 = "../data/train_feature.csv"
INPUT_LABEL_PATH1 = "../data/train_label.csv"
OUTPUT_PATH1 = "../data/train_processed.csv"
INPUT_FEATURE_PATH2 = "../data/test_feature.csv"
OUTPUT_PATH2 = "../data/test_processed.csv"

#读取文件-------------------------------------------------------------------
train_df=pd.read_csv(INPUT_FEATURE_PATH1)
test_df=pd.read_csv(INPUT_FEATURE_PATH2)
train_la=pd.read_csv(INPUT_LABEL_PATH1)

test_df=test_df[0:len(test_df)-3*8]

all_data=pd.concat([train_df,test_df])
#选择列归一方式-------------------------------------------------------------------
#分别是最大最小值，和均值方差法
#在temp中替换就可以
#上半部分为列归一-----------------------------------------------------------------
max_min_scaler = lambda x : (x-np.min(x))/(np.max(x)-np.min(x))
z_score=lambda x: (x - np.mean(x)) / (np.std(x,ddof=1))
temp=z_score
# print(all_data.head())
fzd=all_data[['辐照度']].apply(temp)
fs=all_data[['风速']].apply(temp)
fx=all_data[['风向']].apply(temp)
wd=all_data[['温度']].apply(temp)
sd=all_data[['湿度']].apply(temp)
qy=all_data[['气压']].apply(temp)
all_data_gy=pd.concat([all_data["日期"],fzd,fs,fx,wd,sd,qy],axis=1)
#以下三行为行归一，可注释------------------------------------------------------------------
# all_data_gy_sz=all_data_gy.loc[:,['辐照度','风速','风向','温度','湿度','气压']].values
# all_data_gy_sz= preprocessing.scale(all_data_gy_sz)
# temp=pd.DataFrame(all_data_gy_sz,columns=['辐照度','风速','风向','温度','湿度','气压'],index=all_data_gy.index)
# print(temp)
# print(all_data_gy[['日期']])
# all_data_gy=pd.concat([all_data_gy[['日期']],temp],axis=1)
# print(all_data_gy.head())

# print(all_data_gy_sz)
# # print(all_data_gy)
#分开数据----------------------------------------------------——————————————
train_df_gy=all_data_gy[0:17008]
test_df_gy=all_data_gy[17008:len(all_data_gy)]
print(test_df_gy.head())
print(train_df_gy.head())
#拼接与输出组合————————————————————————————————————————————————————————————
# print(train_df_gy["日期"][item])
data = {}
# i=0
for item in range(len(train_df_gy)):
	rq=train_df_gy["日期"][item]
	if rq not in data:
		data[rq]=[str(rq)]
		data[rq].append(str(train_la['电场实际太阳辐射指数'][int(item/8)]))
	data[rq].append(str(train_df_gy['辐照度'][item]))
	data[rq].append(str(train_df_gy['风速'][item]))
	data[rq].append(str(train_df_gy['风向'][item]))
	data[rq].append(str(train_df_gy['温度'][item]))
	data[rq].append(str(train_df_gy['湿度'][item]))
	data[rq].append(str(train_df_gy['气压'][item]))
	# i=i+1
	# if i==16:
	# 	break
# print(data)
# open(INPUT_FEATURE_PATH1)
fout = open(OUTPUT_PATH1, 'w')
for item in data:
    fout.writelines("%s\n" % (",".join(data[item])))
fout.close()

data = {}
# i=0
for item in range(len(test_df_gy)):
	rq=test_df_gy["日期"][item]
	if rq not in data:
		data[rq]=[str(rq)]
		# data[rq].append(str(train_la['电场实际太阳辐射指数'][int(item/8)]))
	data[rq].append(str(test_df_gy['辐照度'][item]))
	data[rq].append(str(test_df_gy['风速'][item]))
	data[rq].append(str(test_df_gy['风向'][item]))
	data[rq].append(str(test_df_gy['温度'][item]))
	data[rq].append(str(test_df_gy['湿度'][item]))
	data[rq].append(str(test_df_gy['气压'][item]))

fout = open(OUTPUT_PATH2, 'w')
for item in data:
    fout.writelines("%s\n" % (",".join(data[item])))
fout.close()
print("completed")